# datasets/pbmc3k.py
from __future__ import annotations

from typing import Optional, Dict, Any

import numpy as np

from .base import DatasetSpec
from .registry import register
from .transforms import default_preprocess
import sklearn.datasets 

@register("digits")
def load_digits(
    *,
    pca_n: int = 50,
    random_state = None,
    **kwargs 
) -> DatasetSpec:
    """
    Fetch PBMC 3k via scanpy and return as DatasetSpec.
    Labels use scanpy's example anno
    tations if present; otherwise None.
    """

    X, y = sklearn.datasets.load_digits(return_X_y= True)  # downloads and caches automatically
    # try to annotate labels if available later in pipeline; here we keep None
    X = X.astype(np.float32, copy=False)

    labels = y
    batch = None
    meta: Dict[str, Any] = {"sklearn_dataset": "digits", "n_points": X.shape[0], "n_dim": X.shape[1]}

    return DatasetSpec(name="digits", X=X, labels=labels, batch=batch, meta=meta)
